set.seed(11)
library(BayesComposition)
library(knitr)
library(ggplot2)
library(gjam)
library(rioja)
library(analogue)
N <- 500
N_pred <- 25
d <- 4
n_knots <- 30
testatePlotData <- data.frame(species=as.factor(rep(colnames(y), each=N)),
Count=c(as.matrix(y_prop)),
Wetness=rep(X, times=dim(y)[2]))
ggplot(testatePlotData, aes(x=Wetness, y=Count, color=species, group=species)) +
geom_point(alpha=0.25) +
# geom_line(stat="smooth", method="loess", aes(y=Count, x=Wetness),
# alpha=0.5, lwd=1.25) +
theme(legend.position="none") + ggtitle("Testate Composition vs. Water Table Depth") +
labs(x="Water Table Depth", y="Composition") +
theme(plot.title=element_text(size=24, face="bold", hjust=0.5)) +
theme(axis.text.x = element_text(size = 22),
axis.text.y = element_text(size = 22),
axis.title.x = element_text(size = 22),
axis.title.y = element_text(size = 22))
y <- as.matrix(y)
mean_X <- mean(X)
sd_X <- sd(X)
# X <- (X - mean_X) / sd_X
X_knots <- seq(min(X, na.rm=TRUE)-1.25*sd(X, na.rm=TRUE),
max(X, na.rm=TRUE)+1.25*sd(X, na.rm=TRUE), length=n_knots)
params <- list(n_adapt=10000, n_mcmc=20000, n_thin=20,
likelihood="dirichlet-multinomial",
function_type = "basis",
additive_correlation=FALSE,
multiplicative_correlation=FALSE,
message=100, n_chains=4, n_cores=4, df=8)
save_directory <- "./mvgp-test/"
save_file <- "mvgp-dm-testate-bspline.RData"
progress_directory <- "./mvgp-test/"
progress_file <- "mvgp-dm-testate-bspline.txt"
if (file.exists(paste0(save_directory, save_file))) {
## check if MCMC output exists
load(paste0(save_directory, save_file))
} else {
## potentially long running MCMC code
out <- fit_compositional_data(y=y, X=X, params=params,
progress_directory = "./mvgp-test/",
progress_file = "mvgp-dm-testate-bspline.txt",
save_directory = save_directory,
save_file = save_file)
}
## Loading required package: snow
## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/Users/Tips/BayesComposition/fit-mvgp-dirichlet-
## multinomial-testate.Rmd',~+~~+~encoding~+~
## R Version: R version 3.4.1 (2017-06-30)
## snowfall 1.84-6.1 initialized (using snow 0.4-2): parallel execution on 4 CPUs.
## Loading required namespace: rlecuyer
## Library BayesComposition loaded.
## Library BayesComposition loaded in cluster.
## Library coda loaded.
## Library coda loaded in cluster.
##
## Stopping cluster
## extract posterior samples
samples <- extract_compositional_samples(out)
alpha_post <- samples$alpha
beta_post <- samples$beta
Rhat <- make_gelman_rubin(out)
layout(matrix(1:3, 3, 1))
# hist(Rhat[grepl("alpha=", names(Rhat))], main = "Rhat for alpha")
hist(Rhat[grepl("beta", names(Rhat))], main = "Rhat for beta")
hist(Rhat, main="All parameters")
Rhat[!is.finite(Rhat)] <- NA
max(unlist(na.omit(Rhat)))
## [1] 1.026868
alpha_post_mean <- apply(alpha_post, c(2, 3), mean)
## force the sum to one constraint
p_alpha_post_mean <- alpha_post_mean
for (i in 1:N) {
p_alpha_post_mean[i, ] <- p_alpha_post_mean[i, ] / sum(p_alpha_post_mean[i, ])
}
y_percentages <- y
for (i in 1:N) {
y_percentages[i, ] <- y_percentages[i, ] / sum(y_percentages[i, ])
}
fitPlotData <- data.frame(species=as.factor(rep(1:d, each=N)),
count=c(y_percentages),
depth=rep(X, times=d),
alpha=c(p_alpha_post_mean))
g1_post <- ggplot(fitPlotData, aes(x=depth, y=count, color=species, group=species)) +
geom_point(alpha=0.25) + theme(legend.position="none") +
ggtitle("MVGP vs. depth") +
geom_line(aes(x=depth, y=alpha, col = species), fitPlotData, lwd=1.25)
g2_post <- ggplot(fitPlotData, aes(x=depth, y=count, color=species, group=species)) +
geom_point(alpha=0.25) + theme(legend.position="none") +
ggtitle("MVGP vs. depth by species") +
geom_line(aes(x=depth, y=alpha, col = species), fitPlotData, lwd=1.25) +
facet_wrap( ~ species, ncol = 4)
multiplot(g1_post, g2_post, cols=2)
## Loading required package: grid
##
## Reconstruction period data at Hole Bog
##
fileToLoad <- '~/testate/data/Hole Bog 2005 Core, raw data.csv'
n = max(count.fields(fileToLoad, sep=","), na.rm=TRUE)
xx = readLines(fileToLoad)
xx = xx[-c(1:6, 47:56)]
.splitvar = function(x, sep, n) {
var = unlist(strsplit(x, split=","))
length(var) = n
return(var)
}
xx = do.call(cbind, lapply(xx, .splitvar, sep=",", n=n))
xx = apply(xx, 1, paste, collapse=",")
y_recon = read.csv(text=xx, sep=",", header=TRUE)
## processing data to have names first 3 letters of genus and species
genus_hole <- tolower(substr(names(y_recon), 1, 3))
species_hole <- substr(sub(".*\\.", "", sub("\\.catinus", "", sub("\\.minor", "", sub("\\.\\.", "", sub("\\.type", "", names(y_recon)))))), 1, 3)
names(y_recon) <- paste(genus_hole, species_hole, sep="")
source("~/testate/data/join-testate-caribou.R")
##
## align names between data sets
## get expert input here!!
## We are removing many species
##
N_recon_full <- sum(y_recon)
idx1 <- colnames(y) %in% names(y_recon)
# names(y_recon)[!idx1]
# names(y_cal)[!idx2]
## add in any species in calibation data not in reconstruction data
for (i in colnames(y)[!idx1]){
y_recon[, i] <- rep(0, dim(y_recon)[1])
}
idx2 <- names(y_recon) %in% colnames(y)
## remove species in reconstruction data not in calibration data
for (i in names(y_recon)[!idx2]){
y_recon[, i] <- NULL
}
y_recon <- y_recon[, order(colnames(y_recon))]
## check the names and orderings
all.equal(colnames(y), colnames(y_recon))
## [1] TRUE
## Note: we are reducing the testate counts by about 25%...
N_recon_reduced <- sum(y_recon)
N_recon_full - N_recon_reduced
## [1] 903
pred <- predict_compositional_data(as.matrix(y_recon), X,
params=params, samples=samples,
progress_directory=progress_directory,
progress_file = "DM-predict.txt")
## Starting predictions, running for 4000 iterations
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Prediction iteration 100
## Prediction iteration 200
## Prediction iteration 300
## Prediction iteration 400
## Prediction iteration 500
## Prediction iteration 600
## Prediction iteration 700
## Prediction iteration 800
## Prediction iteration 900
## Prediction iteration 1000
## Prediction iteration 1100
## Prediction iteration 1200
## Prediction iteration 1300
## Prediction iteration 1400
## Prediction iteration 1500
## Prediction iteration 1600
## Prediction iteration 1700
## Prediction iteration 1800
## Prediction iteration 1900
## Prediction iteration 2000
## Prediction iteration 2100
## Prediction iteration 2200
## Prediction iteration 2300
## Prediction iteration 2400
## Prediction iteration 2500
## Prediction iteration 2600
## Prediction iteration 2700
## Prediction iteration 2800
## Prediction iteration 2900
## Prediction iteration 3000
## Prediction iteration 3100
## Prediction iteration 3200
## Prediction iteration 3300
## Prediction iteration 3400
## Prediction iteration 3500
## Prediction iteration 3600
## Prediction iteration 3700
## Prediction iteration 3800
## Prediction iteration 3900
## Prediction iteration 4000
n_samples <- length(pred$X[, 1])
N_pred <- length(pred$X[1, ])
X_ci <- apply(pred$X, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ]
hole.df <- data.frame(
Covariate=c(X_ci),
Observation=factor(rep((1:N_pred), each=n_samples*0.95))
)
ggplot(hole.df, aes(Observation, Covariate)) +
geom_violin(position="identity") +
scale_x_discrete(breaks=seq(5, 25, 5)) +
labs(x="Observation", y="Unobserved July Temperature")
n_samples <- length(pred$X[, 1])
N_pred <- length(pred$X[1, ])
preds_WA <- matrix(0, n_samples, N_pred)
preds_MLRC <- matrix(0, n_samples, N_pred)
preds_MAT <- matrix(0, n_samples, N_pred)
# preds_GJAM <- matrix(0, n_samples, d)
for (i in 1:N_pred) {
preds_WA[, i] <- rnorm(n_samples, pred_mu_WA[i], pred_sd_WA[i])
preds_MLRC[, i] <- rnorm(n_samples, pred_mu_MLRC[i], pred_sd_MLRC[i])
preds_MAT[, i] <- rnorm(n_samples, pred_mu_MAT[i], pred_sd_MAT[i])
# preds_GJAM[, j] <- rnorm(n_samples, pred_mu_GJAM, pred_sd_GJAM)
}
X_ci <- rbind(apply(pred$X, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_WA, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_MLRC, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_MAT, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_rf, 1, sort)[(0.025*n_samples+1):(0.975*n_samples), ])
# apply(preds_GJAM, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ])
hole.df <- data.frame(
Covariate=c(X_ci),
Observation=factor(rep((1:N_pred), each=5*n_samples*0.95)),
Model=rep(c("MVGP", "WA", "MLRC", "MAT", "RF"), each=n_samples*0.95)
)
ggplot(hole.df, aes(Observation, Covariate, color=Model)) +
geom_violin(position="dodge") +
scale_x_discrete(breaks=seq(5, 25, 5)) +
labs(x="Observation", y="Unobserved Water Depth") +
facet_wrap(~ Model, ncol=2) +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model))
ggplot(hole.df, aes(Observation, Covariate, color=Model)) +
scale_x_discrete(breaks=seq(5, 25, 5)) +
labs(x="Observation", y="Unobserved Water Depth") +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
stat_summary(fun.ymin = function(z) {quantile(z, 0.025)},
fun.ymax = function(z) {quantile(z, 0.975)},
geom = "ribbon", aes(Observation, Covariate,
group=Model, color=Model, fill=Model), alpha=0.25) +
ylim(-10, 60)
## Warning: Removed 2148 rows containing non-finite values (stat_summary).
## Warning: Removed 2148 rows containing non-finite values (stat_summary).
ggplot(hole.df, aes(Observation, Covariate, color=Model)) +
scale_x_discrete(breaks=seq(5, 25, 5)) +
labs(x="Observation", y="Unobserved Water Depth") +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
stat_summary(fun.ymin = function(z) {quantile(z, 0.025)},
fun.ymax = function(z) {quantile(z, 0.975)},
geom = "ribbon", aes(Observation, Covariate,
group=Model, color=Model, fill=Model), alpha=0.25) +
facet_wrap(~Model, ncol=2) +
ylim(-10, 60)
## Warning: Removed 2148 rows containing non-finite values (stat_summary).
## Warning: Removed 2148 rows containing non-finite values (stat_summary).
# fileToLoad <- read.csv("~/Documents/connoR/Caribou/cariboucounts.csv")
fileToLoad <- read.csv("~/testate/data/rawData/cariboucounts.csv")
y_recon <- fileToLoad[,10:60]
#source("~/Documents/connoR/testate/data/join-testate-caribou.R")
N_recon_full <- sum(y_recon)
idx1 <- colnames(y) %in% names(y_recon)
# names(y_recon)[!idx1]
# names(y_cal)[!idx2]
## add in any species in calibation data not in reconstruction data
for (i in colnames(y)[!idx1]){
y_recon[, i] <- rep(0, dim(y_recon)[1])
}
idx2 <- names(y_recon) %in% colnames(y)
## remove species in reconstruction data not in calibration data
for (i in names(y_recon)[!idx2]){
y_recon[, i] <- NULL
}
y_recon <- y_recon[, order(colnames(y_recon))]
## check the names and orderings
all.equal(colnames(y), colnames(y_recon))
## [1] TRUE
## Note: we are reducing the testate counts by about 25%...
N_recon_reduced <- sum(y_recon)
N_recon_full - N_recon_reduced
## [1] 9046
pred <- predict_compositional_data(as.matrix(y_recon), X,
params=params, samples=samples,
progress_directory=progress_directory,
progress_file = "DM-predict.txt")
## Starting predictions, running for 4000 iterations
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Bug - ESS for X shrunk to current position
## Prediction iteration 100
## Prediction iteration 200
## Prediction iteration 300
## Prediction iteration 400
## Prediction iteration 500
## Prediction iteration 600
## Prediction iteration 700
## Prediction iteration 800
## Prediction iteration 900
## Prediction iteration 1000
## Prediction iteration 1100
## Prediction iteration 1200
## Prediction iteration 1300
## Prediction iteration 1400
## Prediction iteration 1500
## Prediction iteration 1600
## Prediction iteration 1700
## Prediction iteration 1800
## Prediction iteration 1900
## Prediction iteration 2000
## Prediction iteration 2100
## Prediction iteration 2200
## Prediction iteration 2300
## Prediction iteration 2400
## Prediction iteration 2500
## Prediction iteration 2600
## Prediction iteration 2700
## Prediction iteration 2800
## Prediction iteration 2900
## Prediction iteration 3000
## Prediction iteration 3100
## Prediction iteration 3200
## Prediction iteration 3300
## Prediction iteration 3400
## Prediction iteration 3500
## Prediction iteration 3600
## Prediction iteration 3700
## Prediction iteration 3800
## Prediction iteration 3900
## Prediction iteration 4000
n_samples <- length(pred$X[, 1])
N_pred <- length(pred$X[1, ])
X_ci <- apply(pred$X, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ]
caribou.df <- data.frame(
Covariate=c(X_ci),
Observation=factor(rep((1:N_pred), each=n_samples*0.95))
)
ggplot(caribou.df, aes(Observation, Covariate)) +
geom_violin(position="identity") +
scale_x_discrete(breaks=seq(5, 310, 5)) +
labs(x="Observation", y="Unobserved WTD")
y_prop <- y
y_recon_prop <- y_recon
for (i in 1:N) {
y_prop[i, ] <- y_prop[i, ] / sum(y_prop[i, ])
}
for (i in 1:N_pred) {
y_recon_prop[i, ] <- y_recon_prop[i, ] / sum(y_recon_prop[i, ])
}
## WA reconstruction - subset to deal with all zero occurrence species
zeros_idx <- which(colSums(y_prop) == 0)
if (length(zeros_idx) > 0) {
modWA <- rioja::WA(y_prop[, - zeros_idx], X)
predWA <- predict(modWA, y_recon_prop[, - zeros_idx], sse=TRUE, nboot=1000)
} else {
## no data to subset
modWA <- rioja::WA(y_prop, X)
predWA <- predict(modWA, y_recon_prop, sse=TRUE, nboot=1000)
}
## Bootstrapping for SSE:
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======= | 12%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================= | 52%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|============================================== | 72%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|=========================================================== | 92%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
pred_mu_WA <- predWA$fit[, 1]
pred_sd_WA <- sqrt(predWA$v1.boot[, 1])
## MLRC reconstruction - subset to deal with all zero occurrence species
zeros_idx <- which(colSums(y_prop) == 0)
if (length(zeros_idx) > 0) {
modMLRC <- rioja::MLRC(y_prop[, - zeros_idx], X)
predMLRC <- predict(modMLRC, y_recon_prop[, - zeros_idx],
sse=TRUE, nboot=1000)
} else {
modMLRC <- rioja::MLRC(y_prop, X)
predMLRC <- predict(modMLRC, y_recon_prop, sse=TRUE, nboot=1000)
}
## Bootstrapping for SSE:
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======= | 12%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================= | 52%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|============================================== | 72%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|=========================================================== | 92%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
pred_mu_MLRC <- predMLRC$fit[, 1]
pred_sd_MLRC <- sqrt(predMLRC$v1.boot[, 1])
## Modern analogue technique
modMAT <- mat(y, X)
predMAT <- predict(modMAT, y_recon, bootstrap=TRUE, n.boot=1000)
pred_mu_MAT <- apply(predMAT$predictions$model$predicted, 2, mean)
pred_sd_MAT <- apply(predMAT$predictions$model$predicted, 2, sd)
## GJAM model fit
idx_hold <- (N+1):(N+N_pred)
Xdf <- data.frame(x=c(X, rep(NA, N_pred)))
Xdf$x[idx_hold] <- NA
ydf <- data.frame(as.matrix(rbind(y, y_recon)))
colnames(ydf) <- paste("y", 1:dim(y)[2], sep="")
ml <- list(ng = 5000, burnin = 500, typeNames = rep("CC", dim(y)[2]),
PREDICTX=TRUE)
## fit second order polynomial model
# out <- gjam(~ x + I(x^2), Xdf, ydf, ml)
#
# pred_mu_GJAM <- out$prediction$xpredMu[idx_hold, 2] #inverse prediction of x
# pred_sd_GJAM <- out$prediction$xpredSd[idx_hold, 2]
## Random Forest
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
train <- data.frame(y=X, as.matrix(y))
test <- data.frame(y_recon)
n_samples <- length(pred$X[, 1])
rf <- randomForest(y ~ ., data = train, ntree=n_samples)
preds_rf <- predict(rf, test, predict.all=TRUE)$individual
n_samples <- length(pred$X[, 1])
N_pred <- length(pred$X[1, ])
preds_WA <- matrix(0, n_samples, N_pred)
preds_MLRC <- matrix(0, n_samples, N_pred)
preds_MAT <- matrix(0, n_samples, N_pred)
# preds_GJAM <- matrix(0, n_samples, d)
for (i in 1:N_pred) {
preds_WA[, i] <- rnorm(n_samples, pred_mu_WA[i], pred_sd_WA[i])
preds_MLRC[, i] <- rnorm(n_samples, pred_mu_MLRC[i], pred_sd_MLRC[i])
preds_MAT[, i] <- rnorm(n_samples, pred_mu_MAT[i], pred_sd_MAT[i])
# preds_GJAM[, j] <- rnorm(n_samples, pred_mu_GJAM, pred_sd_GJAM)
}
X_ci <- rbind(apply(pred$X, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_WA, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_MLRC, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_MAT, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ],
apply(preds_rf, 1, sort)[(0.025*n_samples+1):(0.975*n_samples), ])
# apply(preds_GJAM, 2, sort)[(0.025*n_samples+1):(0.975*n_samples), ])
caribou.df <- data.frame(
Covariate=c(X_ci),
Observation=factor(rep((1:N_pred), each=5*n_samples*0.95)),
Model=rep(c("MVGP", "WA", "MLRC", "MAT", "RF"), each=n_samples*0.95)
)
ggplot(caribou.df, aes(Observation, Covariate, color=Model)) +
geom_violin(position="dodge") +
scale_x_discrete(breaks=seq(5, 310, 10)) +
labs(x="Observation", y="Unobserved Water Depth") +
facet_wrap(~ Model, ncol=2) +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model))
ggsave("caribou_recons.png")
## Saving 7 x 5 in image
ggplot(caribou.df, aes(Observation, Covariate, color=Model)) +
#geom_violin(position="dodge") +
scale_x_discrete(breaks=seq(310, 0, -10)) +
labs(x="Observation", y="Unobserved Water Depth") +
facet_wrap(~ Model, ncol=2) +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
scale_y_reverse()
ggplot(caribou.df, aes(Observation, Covariate, color=Model)) +
labs(x="Observation", y="Unobserved Water Depth") +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
stat_summary(fun.ymin = function(z) {quantile(z, 0.025)},
fun.ymax = function(z) {quantile(z, 0.975)},
geom = "ribbon", aes(Observation, Covariate,
group=Model, color=Model, fill=Model), alpha=0.25) +
ylim(-10, 60)
## Warning: Removed 15810 rows containing non-finite values (stat_summary).
## Warning: Removed 15810 rows containing non-finite values (stat_summary).
ggplot(subset(caribou.df, Model %in% c("MVGP", "WA")), aes(Observation, Covariate, color=Model)) +
labs(x="Observation", y="Unobserved Water Depth") +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
# stat_summary(fun.ymin = function(z) {quantile(z, 0.025)},
# fun.ymax = function(z) {quantile(z, 0.975)},
# geom = "ribbon", aes(Observation, Covariate,
# group=Model, color=Model, fill=Model), alpha=0.25) +
ylim(-10, 60)
## Warning: Removed 230 rows containing non-finite values (stat_summary).
ggplot(caribou.df, aes(Observation, Covariate, color=Model)) +
labs(x="Observation", y="Unobserved Water Depth") +
stat_summary(fun.y = mean, geom = "line", aes(Observation, Covariate, group=Model)) +
stat_summary(fun.ymin = function(z) {quantile(z, 0.025)},
fun.ymax = function(z) {quantile(z, 0.975)},
geom = "ribbon", aes(Observation, Covariate,
group=Model, color=Model, fill=Model), alpha=0.25) +
facet_wrap(~Model, ncol=2) +
ylim(-10, 60)
## Warning: Removed 15810 rows containing non-finite values (stat_summary).
## Warning: Removed 15810 rows containing non-finite values (stat_summary).